use warnings;
use strict;
use WWW::Mechanize;
our @local=();
our %have_local = ();
# Anfangs- und Endpunkt manuell w�hlen
# Fehlende Schleife fuer den Fall, dass es nur eine Seite mit Suchresultaten gibt (z.B., 1965-1974)
our $startyear=1975;
our $endyear=2014;
our $dosidcount;
our $newfiles=0;
open MISSINGDOCS, "> D:/prelex2013/missdoc.txt" or die;
close MISSINGDOCS;
open LOCALDOCS, " < D:/prelex2013/location_current.txt" or die;
@local=<LOCALDOCS>;
foreach my $local (@local) {chomp ($local);}
close LOCALDOCS;
foreach my $element (@local) { $have_local{$element}++ };
while ($startyear<$endyear) {
     my $agent = WWW::Mechanize->new();
     $agent->get("http://ec.europa.eu/prelex/rech_simple.cfm?CL=en");
     $agent->form_name("RECHERCHE"); 
     $agent->field("clef2", $startyear);
     $agent->field("clef1", 'COM');
     $agent->field("nbr_element", '99');
     $agent->click();
       my $initialresult=$agent->content;
       our $pcpcount=1;
       while ($initialresult) {
             if ($initialresult=~qr/liste_resultats\.cfm\?PCP/) {
                   $initialresult=$';
                   $pcpcount++;
                   }
               else {$initialresult='';}
             }
       $pcpcount=($pcpcount/2)+1;
       if ($pcpcount>1.5) {
             my @pages=(1...$pcpcount);
             foreach my $page(@pages) {
                   my $result=$agent->content();
                   open TEMP, "> D:/prelex2013/temp.html" or die "$!";
                   print TEMP $result;
                   close TEMP;
                   open INPUT, "< D:/prelex2013/temp.html";
                   my @input=<INPUT>;
                   close INPUT;
                   print "\nSearching $startyear, page $page\n";
                   $dosidcount=0;
                   foreach my $input (@input) {
                         if ($input=~/DosID=(.{6})/) {
                               my $dosid=$1;
                               if ($dosid=~'"') {chop($dosid);}
                               if ($dosidcount=~ /[02468]$/ ) {
                                     my $agent2 = WWW::Mechanize->new();
                                     my $url ="http://ec.europa.eu/prelex/detail_dossier_real.cfm?CL=en&DosId="."$dosid";
                                     $agent2->get($url);
                                     my $content_full=$agent2->content();                                     
                                     if ($content_full=~/(COM|SEC)(\s|&nbsp;)\(([0-9]{4})\)(\s|&nbsp;)([0-9]{1,4})(.{5})/) {
                                           our $proposal;
                                           our $filename;
                                           our $d_type=$1;
                                           our $d_year=$3;
                                           our $d_number=$5;
                                           our $d_subnumber=$6;
                                           if ($d_subnumber=~/(\s|&nbsp;)-(\s|&nbsp;)([0-9]{1,2})/) {$d_subnumber=$3;} else {$d_subnumber='';}
                                           if ($d_subnumber ne '') {$proposal="$d_type "."\($d_year\)"." $d_number"." - $d_subnumber";}
                                             else {$proposal="$d_type "."\($d_year\)"." $d_number";}
                                           if ($d_subnumber ne '') {$filename="$d_type"."_$d_year"."_$d_number"."_$d_subnumber";}
                                             else {$filename="$d_type"."_$d_year"."_$d_number";}
                                           my $checkeddocs=$dosidcount/2;
                                           print "Checked Files: $checkeddocs      New Files: $newfiles      Checking $filename\n";  
                                           if ($have_local{$filename}) {}
                                             else {
                                                 open MISSINGDOC, ">> D:/prelex2013/missdoc.txt" or die;
                                                 print MISSINGDOC "$filename\n";
                                                 close MISSINGDOC;
                                                 open DATASHEET, "> D:/prelex2013/datasheets/$d_year/$filename.html" or die "Problem: $!";
                                                 print DATASHEET $content_full;
                                                 close DATASHEET;
                                                 $newfiles++;
                                                 }
                                           }
                                    }
                               $dosidcount++;
                               }
                         }
                   $agent->follow_link(url_regex => qr/liste_resultats\.cfm\?PCP/, n=>$page);
                   $page++;
                   }
       }
             
       $startyear++;
       }
print "$newfiles\n";